<?php
class mcz_Analyzer
{
    private static $stopWords = array(
        'los','las','el','the','of','and','le','de','a','des','une','un','s','is',
		'www','http','com','org','00000000','00008000','000138c5','00029855',
		'0001adc7','00013897','0001adde','0001fbe7'
    );

    public static function analyze($text)
    {
        $text = mcz_Inflector::removeDiacritics($text);
		$text = str_replace(array('-', '\''), array(' ', ' '), $text);
    	$text = preg_replace('/[\'`´"]/', '', $text);
        $text = preg_replace('/[^A-Za-z0-9]/', ' ', $text);
        $text = str_replace('  ', ' ', $text);
        $terms = explode(' ', $text);

        $ret = array();
        if (!empty($terms))
        {
            foreach ($terms as $i => $term)
            {
                if (empty($term))
                {
                    continue;
                }
				//if($term == 's')
				//{
				//	$term = 'is';
				//}
                $lower = strtolower(trim($term));
                if (in_array($lower, self::$stopWords)
					// remove most of hexadecimals from itune's comments
					or  substr($lower, 0, 3) == '000'
					)
                {
                    continue;
                }
                $ret[$i] = $lower;
            }
        }
        return $ret;
    }
}